*1_Wellman___APSR___clean
*July 21, 2020

***********************************
*This File creates: Polity2p5.dta, ICTD.dta, FormerColony.dta, VDem.dta, DPI2017cleaned.dta, Yeardem.dta, WDI.dta, and WDI_Remit_Diaspora.dta to merge with Wellman_APSR_base in Wellman_APSR_merge.do 
***********************************

*Set the working directory to the main folder with the data files*

// Define this as a local variable in case you change the path/filename later.
loc stateid_crosswalk "z_stateid_crosswalk.do"

*********************
*Creating Polity2p5.dta*
*http://www.systemicpeace.org/inscrdata.html
*********************
clear
import excel p5v2018.xls, sheet("p5v2018") firstrow
rename scode politycode
keep politycode country year polity2
replace politycode="ETI" if politycode=="ETH"
replace politycode="SUD" if politycode=="SDN" & year>=2011
rename polity2 polity2v5
la var polity2v5 "Polity2 (Polity5)"

run `stateid_crosswalk'

sort stateid year
by stateid: gen polity2lag = polity2v5[_n-1]
rename polity2lag Polity2
la var Polity2 "Political Regime"
save "Polity2p5.dta", replace

*********************
*Creating ICTD.dta*
*https://www.wider.unu.edu/project/government-revenue-dataset 
*********************
clear
use Merged.dta
keep country iso reg year rev_ex_gr_ex_sc
run  `stateid_crosswalk'
drop iso

rename rev_ex_gr_ex_sc TaxGDP 
bysort stateid: ipolate TaxGDP year, g(TaxGDP_ipo)
gen TaxGDP2 = TaxGDP_ipo/100
sort stateid year
by stateid: gen TaxGDPlag2 = TaxGDP2[_n-1]
gen TaxGDPlag2ln = log(TaxGDPlag2)
la var TaxGDP2 "TaxGDP/100"
la var TaxGDPlag2 "TaxGDP2, 1yr lag"
la var TaxGDPlag2ln "(ln)State Capacity" 
rename TaxGDPlag2ln TaxGDPlag2ln2020
la var TaxGDPlag2ln2020 "(ln)State Capacity"  
save ICTD.dta, replace 

*********************
*Creating FormerColony.dta*
*https://www.danieltreisman.org/articles 
*********************
clear
import excel what_have_we_learned_data.xls, sheet("Data") firstrow
keep country wbcode frencol spanporc
rename wbcode wdicode

replace country="Ghana" if country=="GHA"
replace country="Botswana" if country=="BOTSWA"
replace country="Burkina Faso" if country=="BURKI FASO"
replace country="Namibia" if country=="MIBIA"
replace wdicode="NAM" if country=="Namibia" 

replace country = proper(country) 
run  `stateid_crosswalk'

replace stateid="STP" if wdicode=="STP"
replace frencol=1 if frencol==2
replace frencol=1 if country=="GUINEA"
replace spanporc=0 if country=="GUINEA" 
gen frenchpor= frencol + spanporc
replace frenchpor=0 if country=="Namibia"
rename frencol FrenCol
la var FrenCol "Former French Colony" 
la var frenchpor "Frmr. Portuguese or French Colony" 
drop country 
save FormerColony.dta, replace

*********************
*Creating VDem.dta*
*https://www.v-dem.net/en/data/data-version-10/
*********************
use country_name year v2elembcap v2elembcap_ord using V-Dem-CY-Core-v10.dta, clear
run  `stateid_crosswalk'
replace stateid="CON" if country_name=="Republic of the Congo"
sort stateid year
by stateid: gen EMBCaplag = v2elembcap[_n-1]
rename EMBCaplag EMBcapacity 
la var v2elembcap "EMB capacity (relative scale)"
la var v2elembcap_ord "EMB capacity (ordinal scale)"
rename EMBcapacity EMBcapacity2020 
la var EMBcapacity2020 "EMB capacity"
save VDem.dta, replace 

*********************
*Creating DPI2017cleaned.dta*
*https://www.v-dem.net/en/data/data-version-10/
* Should this be: https://mydata.iadb.org/Reform-Modernization-of-the-State/Database-of-Political-Institutions-2017/938i-s2bw?
*********************
insheet using Database_of_Political_Institutions_2017.csv, comma clear
keep countryname ifs year execme pr

rename ifs dpicode
replace country = "Cape Verde" if dpicode=="CPV"
replace country = "South Africa" if dpicode=="ZAF"
replace country = "Congo-Brazzaville" if country=="Congo" 
replace country = "Comoros" if country=="Comoro Is." 
replace country = "Equatorial Guinea" if country=="Eq. Guinea" 
replace country = "Democratic Republic of Congo" if country=="Congo (DRC)" 
replace country = "Central African Republic" if country=="Cent. Af. Rep." 
*DPI is Jan 1 so need to adjust year 1 behind*
rename year oldyear
gen year = oldyear-1
rename countryname country
run  `stateid_crosswalk'
drop oldyear
*cleaning inconsistent -999*
replace pr=1 if country=="Chad"
replace pr=1 if country=="Guinea-Bissau"
replace pr=1 if country=="Guinea"
save DPI2017cleaned.dta, replace

*********************
*Creating Yeardem.dta*
*https://www.journals.uchicago.edu/doi/suppl/10.1086/700936
*********************
use country_name wbcode year yeardem using "DDCGdata_final.dta", clear
rename country_name country
run `stateid_crosswalk'
save Yeardem.dta, replace

*********************
*Creating WDI.dta*
*https://databank.worldbank.org/data/reports.aspx?source=world-development-indicators#
*********************
import excel Data_Extract_From_World_Development_Indicators.xlsx, sheet("Data") firstrow clear
sort CountryName Time
rename Time year
drop in 1/5
destring year, replace
drop TimeCode
rename CountryName country
rename CountryCode wdicode
rename PopulationtotalSPPOPTOTL WBpop
run `stateid_crosswalk'

rename Personalremittancesreceived RemitGDP 
replace RemitGDP="." if RemitGDP==".."
destring RemitGDP, replace
destring year, replace
replace WBpop="." if WBpop==".."
destring, replace
gen RemitPctGDP = RemitGDP/100
sort stateid year
by stateid: gen RemitPctGDPlag = RemitPctGDP[_n-1]
gen RemitPctGDPlagln = log(RemitPctGDPlag)
la var RemitPctGDP "RemitGDP/100"
la var RemitPctGDPlag "RemitPctGDP, 1yr lag"
la var RemitPctGDPlagln "(ln)RemitPctGDP" 
save WDI.dta, replace 

*********************
*Creating WDI_Remit_Diaspora.dta*
*https://www.worldbank.org/en/topic/migrationremittancesdiasporaissues/brief/migration-remittances-data
*z_leblang_diaspora.dta courtesy of David Leblang 
*********************
import excel bilateralmigrationmatrix20130.xlsx, sheet("Bilateral Migration 2013") clear
drop in 1
drop in 215/220
keep A HJ
rename A country
rename HJ diaspora
//destring diaspora, replace
cap drop temp 
gen temp = real(diaspora)
drop diaspora
rename temp diaspora
gen year=2013
run  `stateid_crosswalk'
save WBdiaspora2013.dta, replace

clear
import excel bilateralmigrationmatrix20170_Apr2018.xlsx, sheet("Bilateral Migration 2017")
drop in 1/2
drop in 215/220
keep A HJ
rename A country
rename HJ diaspora
cap drop temp 
gen temp = real(diaspora)
drop diaspora
rename temp diaspora
//destring diaspora, replace
gen year=2017
run  `stateid_crosswalk'
save WBdiaspora2017.dta, replace

clear
use z_leblang_diaspora.dta
append using WBdiaspora2013.dta
append using WBdiaspora2017.dta
save leblang_diaspora_plus.dta, replace 

clear
use WDI.dta
merge m:m stateid year using "leblang_diaspora_plus.dta"
drop if _m==2
drop _m
drop if year==2018
bysort stateid: ipolate diaspora year, g(diaspora_ipo)

gen DiasPop2020 = diaspora_ipo/WBpop
sort stateid year
by stateid: gen DiasPoplag2020 = DiasPop2020[_n-1]
gen DiasPoplagln2020 = log(DiasPoplag2020)
la var DiasPop2020 "Relative Diaspora Size"
la var DiasPoplag2020 "DiasPop"
la var DiasPoplagln2020 "(ln)Relative Diaspora Size" 

save WDI_Remit_Diaspora.dta, replace 

********
clear
